Do the modules recapitulate sub-cell types clusters/states?
degs_sn = "/pastel/resources/sn_columbia_degs/Green_sn_may2024/SupplementaryTable2_AtlasCharacterization1.xlsx"
degs_sn = read_excel(degs_sn, sheet = "DEGs", col_names = TRUE)
degs_sn = as.data.frame(degs_sn)
# createDT(degs_sn)#############################################
### Microglia states vs Microglia modules
#############################################
microglia_states_df = degs_sn %>%
filter(cell.type == "microglia") %>%
group_by(cell.type,state) %>%
reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
category = paste0(cell.type, "_", state)) %>%
group_by(category) %>%
mutate(stats = (p_val_adj*avg_log2FC)) %>%
filter(!is.na(state))
degs_sn_clean = microglia_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists[[cat_i]] = unique(degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}
### Modules from SE
modules_file = read.table(paste0(net_dir, "mic/geneBycluster.txt"), header = T)
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]
# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
m_cluster = clusters_list[i]
module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
# Header for iteration
txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
output[[txt]] <- module$gene_name
}
# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")
rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot
pval_bonf_df = as.data.frame(pval_table) %>%
mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>%
as.data.frame()
pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE,
cutpoints = c(0, 0.05, 1),
symbols = c(quote("\u2731")," "))
col_fun = colorRampPalette(c("white","#9467BD"))(5)
# grDevices::cairo_pdf(file = paste0(work_dir, "jaccard_mic.pdf"), width = 16, height = 6)
ComplexHeatmap::Heatmap(jaccard_table,
col = col_fun, name = "Jaccard Index",
rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
column_title = "",
row_title = "Modules", row_names_side = c("left"),
row_order = order(sapply(genes_lists, length)),
cluster_columns = T, show_column_dend = T,
cluster_rows = T, show_row_dend = T,
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
# grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
}) mod_intersection_list = getNestedList(gom.obj, name="intersection")
mod_intersection_list2 = lapply(mod_intersection_list, function(x) {
x_df = map_df(x, ~{
paste(.x, collapse = ",")
}, .id = "id") %>% t() %>% as.data.frame()
x_df = x_df %>% rownames_to_column("subcell_pop")
colnames(x_df) = c("subcell_pop", "genes")
x_df
})
mod_intersection_df = map_df(mod_intersection_list2, bind_rows,.id = "module")
mod_intersection_df = mod_intersection_df[,c("subcell_pop", "module", "genes")] %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_intersection_num_df = as.data.frame(mod_intersection) %>% rownames_to_column("subcell_pop") %>%
pivot_longer(cols = -subcell_pop,
names_to = "module",
values_to = "intersection") %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_pval_bonf_df = pval_bonf_df %>% rownames_to_column("subcell_pop") %>%
pivot_longer(cols = -subcell_pop,
names_to = "module",
values_to = "bonf_pval") %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_stats = mod_pval_bonf_df %>%
left_join(mod_intersection_num_df[,c("cell_mod","intersection")], by = c("cell_mod")) %>%
left_join(mod_intersection_df[,c("cell_mod","genes")], by = c("cell_mod")) %>% distinct() %>%
arrange(bonf_pval)
createDT(mod_stats)#############################################
### Astrocytes states vs Astrocytes modules
#############################################
astrocytes_states_df = degs_sn %>%
filter(cell.type == "astrocytes") %>%
group_by(cell.type,state) %>%
reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
category = paste0(cell.type, "_", state)) %>%
group_by(category) %>%
mutate(stats = (p_val_adj*avg_log2FC)) %>%
filter(!is.na(state))
degs_sn_clean = astrocytes_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists[[cat_i]] = degs_sn_clean$gene[degs_sn_clean$category == cat_i]
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}
### Modules from SE
modules_file = read.table(paste0(net_dir, "ast/geneBycluster.txt"), header = T)
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]
# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
m_cluster = clusters_list[i]
module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
# Header for iteration
txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
output[[txt]] <- module$gene_name
}
# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")
rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot
pval_bonf_df = as.data.frame(pval_table) %>%
mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>%
as.data.frame()
pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE,
cutpoints = c(0, 0.05, 1),
symbols = c(quote("\u2731")," "))
col_fun = colorRampPalette(c("white","#1F77B4"))(5)
# grDevices::cairo_pdf(file = paste0(work_dir, "jaccard_ast.pdf"), width = 13, height = 4)
ComplexHeatmap::Heatmap(jaccard_table,
col = col_fun, name = "Jaccard Index",
rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
column_title = "",
row_title = "Modules", row_names_side = c("left"),
row_order = order(sapply(genes_lists, length)),
cluster_columns = T, show_column_dend = T,
cluster_rows = T, show_row_dend = T,
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
# grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
}) mod_intersection_list = getNestedList(gom.obj, name="intersection")
mod_intersection_list2 = lapply(mod_intersection_list, function(x) {
x_df = map_df(x, ~{
paste(.x, collapse = ",")
}, .id = "id") %>% t() %>% as.data.frame()
x_df = x_df %>% rownames_to_column("subcell_pop")
colnames(x_df) = c("subcell_pop", "genes")
x_df
})
mod_intersection_df = map_df(mod_intersection_list2, bind_rows,.id = "module")
mod_intersection_df = mod_intersection_df[,c("subcell_pop", "module", "genes")] %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_intersection_num_df = as.data.frame(mod_intersection) %>% rownames_to_column("subcell_pop") %>%
pivot_longer(cols = -subcell_pop,
names_to = "module",
values_to = "intersection") %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_pval_bonf_df = pval_bonf_df %>% rownames_to_column("subcell_pop") %>%
pivot_longer(cols = -subcell_pop,
names_to = "module",
values_to = "bonf_pval") %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_stats = mod_pval_bonf_df %>%
left_join(mod_intersection_num_df[,c("cell_mod","intersection")], by = c("cell_mod")) %>%
left_join(mod_intersection_df[,c("cell_mod","genes")], by = c("cell_mod")) %>% distinct() %>%
arrange(bonf_pval)
createDT(mod_stats)#############################################
### excitatory states vs excitatory modules
#############################################
excitatory_states_df = degs_sn %>%
filter(cell.type == "excitatory") %>%
group_by(cell.type,state) %>%
reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
category = paste0(cell.type, "_", state)) %>%
group_by(category) %>%
mutate(stats = (p_val_adj*avg_log2FC)) %>%
filter(!is.na(state))
degs_sn_clean = excitatory_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists[[cat_i]] = degs_sn_clean$gene[degs_sn_clean$category == cat_i]
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}
### Modules from SE
modules_file = read.table(paste0(net_dir, "ext/geneBycluster.txt"), header = T)
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]
# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
m_cluster = clusters_list[i]
module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
# Header for iteration
txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
output[[txt]] <- module$gene_name
}
# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")
rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot
pval_bonf_df = as.data.frame(pval_table) %>%
mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>%
as.data.frame()
pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE,
cutpoints = c(0, 0.05, 1),
symbols = c(quote("\u2731")," "))
col_fun = colorRampPalette(c("white","#2ca02c"))(5)
ComplexHeatmap::Heatmap(jaccard_table,
col = col_fun, name = "Jaccard Index",
rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
column_title = "",
row_title = "Modules", row_names_side = c("left"),
row_order = order(sapply(genes_lists, length)),
cluster_columns = T, show_column_dend = T,
cluster_rows = T, show_row_dend = T,
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
# grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
}) mod_intersection_list = getNestedList(gom.obj, name="intersection")
mod_intersection_list2 = lapply(mod_intersection_list, function(x) {
x_df = map_df(x, ~{
paste(.x, collapse = ",")
}, .id = "id") %>% t() %>% as.data.frame()
x_df = x_df %>% rownames_to_column("subcell_pop")
colnames(x_df) = c("subcell_pop", "genes")
x_df
})
mod_intersection_df = map_df(mod_intersection_list2, bind_rows,.id = "module")
mod_intersection_df = mod_intersection_df[,c("subcell_pop", "module", "genes")] %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_intersection_num_df = as.data.frame(mod_intersection) %>% rownames_to_column("subcell_pop") %>%
pivot_longer(cols = -subcell_pop,
names_to = "module",
values_to = "intersection") %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_pval_bonf_df = pval_bonf_df %>% rownames_to_column("subcell_pop") %>%
pivot_longer(cols = -subcell_pop,
names_to = "module",
values_to = "bonf_pval") %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_stats = mod_pval_bonf_df %>%
left_join(mod_intersection_num_df[,c("cell_mod","intersection")], by = c("cell_mod")) %>%
left_join(mod_intersection_df[,c("cell_mod","genes")], by = c("cell_mod")) %>% distinct() %>%
arrange(bonf_pval)
createDT(mod_stats)#############################################
### inhibitory states vs inhibitory modules
#############################################
inhibitory_states_df = degs_sn %>%
filter(cell.type == "inhibitory") %>%
group_by(cell.type,state) %>%
reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
category = paste0(cell.type, "_", state)) %>%
group_by(category) %>%
mutate(stats = (p_val_adj*avg_log2FC)) %>%
filter(!is.na(state))
degs_sn_clean = inhibitory_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists[[cat_i]] = degs_sn_clean$gene[degs_sn_clean$category == cat_i]
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}
### Modules from SE
modules_file = read.table(paste0(net_dir, "inh/geneBycluster.txt"), header = T)
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]
# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
m_cluster = clusters_list[i]
module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
# Header for iteration
txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
output[[txt]] <- module$gene_name
}
# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")
rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot
pval_bonf_df = as.data.frame(pval_table) %>%
mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>%
as.data.frame()
pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE,
cutpoints = c(0, 0.05, 1),
symbols = c(quote("\u2731")," "))
col_fun = colorRampPalette(c("white","#d62728"))(5)
ComplexHeatmap::Heatmap(jaccard_table,
col = col_fun, name = "Jaccard Index",
rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
column_title = "",
row_title = "Modules", row_names_side = c("left"),
row_order = order(sapply(genes_lists, length)),
cluster_columns = T, show_column_dend = T,
cluster_rows = T, show_row_dend = T,
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
# grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
}) mod_intersection_list = getNestedList(gom.obj, name="intersection")
mod_intersection_list2 = lapply(mod_intersection_list, function(x) {
x_df = map_df(x, ~{
paste(.x, collapse = ",")
}, .id = "id") %>% t() %>% as.data.frame()
x_df = x_df %>% rownames_to_column("subcell_pop")
colnames(x_df) = c("subcell_pop", "genes")
x_df
})
mod_intersection_df = map_df(mod_intersection_list2, bind_rows,.id = "module")
mod_intersection_df = mod_intersection_df[,c("subcell_pop", "module", "genes")] %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_intersection_num_df = as.data.frame(mod_intersection) %>% rownames_to_column("subcell_pop") %>%
pivot_longer(cols = -subcell_pop,
names_to = "module",
values_to = "intersection") %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_pval_bonf_df = pval_bonf_df %>% rownames_to_column("subcell_pop") %>%
pivot_longer(cols = -subcell_pop,
names_to = "module",
values_to = "bonf_pval") %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_stats = mod_pval_bonf_df %>%
left_join(mod_intersection_num_df[,c("cell_mod","intersection")], by = c("cell_mod")) %>%
left_join(mod_intersection_df[,c("cell_mod","genes")], by = c("cell_mod")) %>% distinct() %>%
arrange(bonf_pval)
createDT(mod_stats)#############################################
### oligodendroglia states vs oligodendroglia modules
#############################################
oligodendroglia_states_df = degs_sn %>%
filter(cell.type == "oligodendroglia") %>%
group_by(cell.type,state) %>%
reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
category = paste0(cell.type, "_", state)) %>%
group_by(category) %>%
mutate(stats = (p_val_adj*avg_log2FC)) %>%
filter(!is.na(state))
degs_sn_clean = oligodendroglia_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists[[cat_i]] = degs_sn_clean$gene[degs_sn_clean$category == cat_i]
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}
### Modules from SE
modules_file = read.table(paste0(net_dir, "oli/geneBycluster.txt"), header = T)
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]
# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
m_cluster = clusters_list[i]
module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
# Header for iteration
txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
output[[txt]] <- module$gene_name
}
# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")
rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot
pval_bonf_df = as.data.frame(pval_table) %>%
mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>%
as.data.frame()
pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE,
cutpoints = c(0, 0.05, 1),
symbols = c(quote("\u2731")," "))
col_fun = colorRampPalette(c("white","#e377c2"))(5)
ComplexHeatmap::Heatmap(jaccard_table,
col = col_fun, name = "Jaccard Index",
rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
column_title = "",
row_title = "Modules", row_names_side = c("left"),
row_order = order(sapply(genes_lists, length)),
cluster_columns = T, show_column_dend = T,
cluster_rows = T, show_row_dend = T,
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
# grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
}) mod_intersection_list = getNestedList(gom.obj, name="intersection")
mod_intersection_list2 = lapply(mod_intersection_list, function(x) {
x_df = map_df(x, ~{
paste(.x, collapse = ",")
}, .id = "id") %>% t() %>% as.data.frame()
x_df = x_df %>% rownames_to_column("subcell_pop")
colnames(x_df) = c("subcell_pop", "genes")
x_df
})
mod_intersection_df = map_df(mod_intersection_list2, bind_rows,.id = "module")
mod_intersection_df = mod_intersection_df[,c("subcell_pop", "module", "genes")] %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_intersection_num_df = as.data.frame(mod_intersection) %>% rownames_to_column("subcell_pop") %>%
pivot_longer(cols = -subcell_pop,
names_to = "module",
values_to = "intersection") %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_pval_bonf_df = pval_bonf_df %>% rownames_to_column("subcell_pop") %>%
pivot_longer(cols = -subcell_pop,
names_to = "module",
values_to = "bonf_pval") %>%
mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))
mod_stats = mod_pval_bonf_df %>%
left_join(mod_intersection_num_df[,c("cell_mod","intersection")], by = c("cell_mod")) %>%
left_join(mod_intersection_df[,c("cell_mod","genes")], by = c("cell_mod")) %>% distinct() %>%
arrange(bonf_pval)
createDT(mod_stats)#############################################
### vascular states vs end modules
#############################################
vascular_states_df = degs_sn %>%
filter(cell.type == "vascular niche") %>%
group_by(cell.type,state) %>%
reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
category = paste0(cell.type, "_", state)) %>%
group_by(category) %>%
mutate(stats = (p_val_adj*avg_log2FC)) %>%
filter(!is.na(state))
degs_sn_clean = vascular_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists[[cat_i]] = degs_sn_clean$gene[degs_sn_clean$category == cat_i]
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}
### Modules from SE
modules_file = read.table(paste0(net_dir, "end/geneBycluster.txt"), header = T)
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]
# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
m_cluster = clusters_list[i]
module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
# Header for iteration
txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
output[[txt]] <- module$gene_name
}
# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")
rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot
pval_bonf_df = as.data.frame(pval_table) %>%
mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>%
as.data.frame()
pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE,
cutpoints = c(0, 0.05, 1),
symbols = c(quote("\u2731")," "))
col_fun = colorRampPalette(c("white","#ff7f0e"))(5)
ComplexHeatmap::Heatmap(jaccard_table,
col = col_fun, name = "Jaccard Index",
rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
column_title = "",
row_title = "Modules", row_names_side = c("left"),
row_order = order(sapply(genes_lists, length)),
cluster_columns = T, show_column_dend = T,
cluster_rows = T, show_row_dend = T,
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
# grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
}) ## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] gtools_3.9.5 RColorBrewer_1.1-3 GeneOverlap_1.30.0 lubridate_1.9.3
## [5] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2
## [9] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1
## [13] tidyverse_2.0.0 readxl_1.4.3
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.9 jsonlite_1.8.8 foreach_1.5.2
## [4] bslib_0.8.0 highr_0.11 stats4_4.1.2
## [7] cellranger_1.1.0 yaml_2.3.10 pillar_1.9.0
## [10] glue_1.7.0 digest_0.6.36 colorspace_2.1-1
## [13] htmltools_0.5.8.1 pkgconfig_2.0.3 GetoptLong_1.0.5
## [16] magick_2.8.4 scales_1.3.0 tzdb_0.4.0
## [19] timechange_0.3.0 generics_0.1.3 IRanges_2.28.0
## [22] DT_0.33 cachem_1.1.0 withr_3.0.0
## [25] BiocGenerics_0.40.0 cli_3.6.3 magrittr_2.0.3
## [28] crayon_1.5.3 evaluate_0.24.0 fansi_1.0.6
## [31] doParallel_1.0.17 gplots_3.1.3.1 Cairo_1.6-2
## [34] tools_4.1.2 hms_1.1.3 GlobalOptions_0.1.2
## [37] lifecycle_1.0.4 matrixStats_1.3.0 ComplexHeatmap_2.15.4
## [40] S4Vectors_0.32.4 munsell_0.5.1 cluster_2.1.2
## [43] compiler_4.1.2 jquerylib_0.1.4 caTools_1.18.2
## [46] rlang_1.1.4 iterators_1.0.14 rstudioapi_0.16.0
## [49] htmlwidgets_1.6.4 rjson_0.2.21 circlize_0.4.16
## [52] crosstalk_1.2.1 bitops_1.0-8 rmarkdown_2.27
## [55] gtable_0.3.5 codetools_0.2-18 R6_2.5.1
## [58] knitr_1.48 fastmap_1.2.0 utf8_1.2.4
## [61] clue_0.3-65 KernSmooth_2.23-20 shape_1.4.6.1
## [64] stringi_1.8.4 parallel_4.1.2 Rcpp_1.0.13
## [67] vctrs_0.6.5 png_0.1-8 tidyselect_1.2.1
## [70] xfun_0.46